Hands-on Exercise 3

Author

Shaun Tan

Published

April 26, 2023

Getting Started

Installing and launching R packages

The code chunk below uses p_load() of pacman package to check if the required libraries are installed on the computer. If they are, then they will be launched into R.

pacman::p_load(ggiraph, plotly, patchwork, DT, tidyverse)

Importing the data

exam_data <- read_csv("data/Exam_data.csv")

Examining the data

summary(exam_data)
      ID               CLASS              GENDER              RACE          
 Length:322         Length:322         Length:322         Length:322        
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
    ENGLISH          MATHS          SCIENCE     
 Min.   :21.00   Min.   : 9.00   Min.   :15.00  
 1st Qu.:59.00   1st Qu.:58.00   1st Qu.:49.25  
 Median :70.00   Median :74.00   Median :65.00  
 Mean   :67.18   Mean   :69.33   Mean   :61.16  
 3rd Qu.:78.00   3rd Qu.:85.00   3rd Qu.:74.75  
 Max.   :96.00   Max.   :99.00   Max.   :96.00  

Using ggiraph

Tooltip effect with tooltip aesthetic

p <- ggplot(data=exam_data, 
       aes(x = MATHS)) +
  geom_dotplot_interactive(
    aes(tooltip = ID),
    stackgroups = TRUE, 
    binwidth = 1, 
    method = "histodot") +
  scale_y_continuous(NULL, 
                     breaks = NULL)
girafe(
  ggobj = p,
  width_svg = 6,
  height_svg = 6*0.618
)

Displaying multiple information on tooltip

exam_data$tooltip <- c(paste0(     
  "Name = ", exam_data$ID,         
  "\n Class = ", exam_data$CLASS,
  "\n Gender = ", exam_data$GENDER)) 

p <- ggplot(data=exam_data, 
       aes(x = MATHS)) +
  geom_dotplot_interactive(
    aes(tooltip = exam_data$tooltip), 
    stackgroups = TRUE,
    binwidth = 1,
    method = "histodot") +
  scale_y_continuous(NULL,               
                     breaks = NULL)
girafe(
  ggobj = p,
  width_svg = 8,
  height_svg = 8*0.618
)

Customising Tooltip style

Code chunk below uses opts_tooltip() of ggiraph to customize tooltip rendering by add css declarations.

tooltip_css <- "background-color:white; #<<
font-style:bold; color:black;" #<<

p <- ggplot(data=exam_data, 
       aes(x = MATHS)) +
  geom_dotplot_interactive(              
    aes(tooltip = ID),                   
    stackgroups = TRUE,                  
    binwidth = 1,                        
    method = "histodot") +               
  scale_y_continuous(NULL,               
                     breaks = NULL)
girafe(                                  
  ggobj = p,                             
  width_svg = 6,                         
  height_svg = 6*0.618,
  options = list(    #<<
    opts_tooltip(    #<<
      css = tooltip_css)) #<<
)                                        

Displaying statistics on tooltip

Code chunk below shows an advanced way to customise tooltip. In this example, a stat_summary() function is used to compute 90% confident interval of the mean. The derived statistics are then displayed in the tooltip.

tooltip <- function(y, ymax, accuracy = .01) {
  mean <- scales::number(y, accuracy = accuracy)
  sem <- scales::number(ymax - y, accuracy = accuracy)
  paste("Mean maths scores:", mean, "+/-", sem)
}

gg_point <- ggplot(data=exam_data, 
                   aes(x = RACE),
) +
  stat_summary(aes(y = MATHS, 
                   tooltip = after_stat(  
                     tooltip(y, ymax))),  
    fun.data = "mean_se", 
    geom = GeomInteractiveCol,  
    fill = "light blue"
  ) +
  stat_summary(aes(y = MATHS),
    fun.data = mean_se,
    geom = "errorbar", width = 0.2, size = 0.2
  )

girafe(ggobj = gg_point,
       width_svg = 8,
       height_svg = 8*0.618)

Hover effect with data_id aesthetic

Code chunk below shows the second interactive feature of ggiraph, namely data_id.

exam_data$tooltip <- c(paste0(     
  "Class = ", exam_data$CLASS)) 

p <- ggplot(data=exam_data, 
       aes(x = MATHS)) +
  geom_dotplot_interactive(           
    aes(data_id = CLASS, tooltip = exam_data$tooltip),             
    stackgroups = TRUE,               
    binwidth = 1,                        
    method = "histodot") +               
  scale_y_continuous(NULL,               
                     breaks = NULL)
girafe(                                  
  ggobj = p,                             
  width_svg = 6,                         
  height_svg = 6*0.618                      
)                                        

Styling hover effect

In the code chunk below, css codes are used to change the highlighting effect.

exam_data$tooltip <- c(paste0(     
  "Class = ", exam_data$CLASS)) 

p <- ggplot(data=exam_data, 
       aes(x = MATHS)) +
  geom_dotplot_interactive(              
    aes(data_id = CLASS, tooltip=exam_data$tooltip),              
    stackgroups = TRUE,                  
    binwidth = 1,                        
    method = "histodot") +               
  scale_y_continuous(NULL,               
                     breaks = NULL)
girafe(                                  
  ggobj = p,                             
  width_svg = 6,                         
  height_svg = 6*0.618,
  options = list(                        
    opts_hover(css = "fill: #202020;"),  
    opts_hover_inv(css = "opacity:0.2;") 
  )                                        
)                                        

## Click effect with onclick

onclick argument of ggiraph provides hotlink interactivity on the web. The code chunk below shown an example of onclick.

exam_data$onclick <- sprintf("window.open(\"%s%s\")",
"https://open.spotify.com/",
as.character(exam_data$ID))

p <- ggplot(data=exam_data, 
       aes(x = MATHS)) +
  geom_dotplot_interactive(              
    aes(onclick = onclick),              
    stackgroups = TRUE,                  
    binwidth = 1,                        
    method = "histodot") +               
  scale_y_continuous(NULL,               
                     breaks = NULL)
girafe(                                  
  ggobj = p,                             
  width_svg = 6,                         
  height_svg = 6*0.618)                                        

Coordinated Multiple Views with ggiraph

Coordinated multiple views methods has been implemented in the data visualisation below using the interactive function of ggiraph and patchwork function.

p1 <- ggplot(data=exam_data, 
       aes(x = MATHS)) +
  geom_dotplot_interactive(              
    aes(data_id = ID),              
    stackgroups = TRUE,                  
    binwidth = 1,                        
    method = "histodot") +  
  coord_cartesian(xlim=c(0,100)) + 
  scale_y_continuous(NULL,               
                     breaks = NULL)

p2 <- ggplot(data=exam_data, 
       aes(x = ENGLISH)) +
  geom_dotplot_interactive(              
    aes(data_id = ID),              
    stackgroups = TRUE,                  
    binwidth = 1,                        
    method = "histodot") + 
  coord_cartesian(xlim=c(0,100)) + 
  scale_y_continuous(NULL,               
                     breaks = NULL)

girafe(code = print(p1 + p2), 
       width_svg = 6,
       height_svg = 3,
       options = list(
         opts_hover(css = "fill: #202020;"),
         opts_hover_inv(css = "opacity:0.2;")
         )
       ) 

Interactive Data Visualisation - plotly methods!

Creating an interactive scatter plot: plot_ly() method

The tabset below shows an example a basic interactive plot created by using plot_ly().

plot_ly(data = exam_data, 
             x = ~MATHS, 
             y = ~ENGLISH)

Working with a visual variable: plot_ly() method

In the code chunk below, color argument is mapped to a qualitative visual variable (i.e. RACE).

plot_ly(data = exam_data, 
        x = ~ENGLISH, 
        y = ~MATHS, 
        color = ~RACE)

Creating an interactive scatter plot: ggplotly() method

The code chunk below plots an interactive scatter plot by using ggplotly().

p <- ggplot(data=exam_data, 
            aes(x = MATHS,
                y = ENGLISH)) +
  geom_point(size=1) +
  coord_cartesian(xlim=c(0,100),
                  ylim=c(0,100))
ggplotly(p)

Coordinated Multiple Views with plotly

The creation of a coordinated linked plot by using plotly involves three steps:

`. Highlight_key() of plotly package is used as shared data. 2. Two scatterplots will be created by using ggplot2 functions. 3. Subplot() of plotly package is used to place them next to each other side-by-side.

d <- highlight_key(exam_data)
p1 <- ggplot(data=d, 
            aes(x = MATHS,
                y = ENGLISH)) +
  geom_point(size=1) +
  coord_cartesian(xlim=c(0,100),
                  ylim=c(0,100))

p2 <- ggplot(data=d, 
            aes(x = MATHS,
                y = SCIENCE)) +
  geom_point(size=1) +
  coord_cartesian(xlim=c(0,100),
                  ylim=c(0,100))
subplot(ggplotly(p1),
        ggplotly(p2))

Interactive Data Visualisation - crosstalk methods!

Crosstalk is an add-on to the htmlwidgets package. It extends htmlwidgets with a set of classes, functions, and conventions for implementing cross-widget interactions (currently, linked brushing and filtering)

Interactive Data Table: DT package

DT::datatable(exam_data, class= "compact")

Linked brushing: crosstalk method

d <- highlight_key(exam_data) 
p <- ggplot(d, 
            aes(ENGLISH, 
                MATHS)) + 
  geom_point(size=1) +
  coord_cartesian(xlim=c(0,100),
                  ylim=c(0,100))

gg <- highlight(ggplotly(p),        
                "plotly_selected")  

crosstalk::bscols(gg,               
                  DT::datatable(d), 
                  widths = 5)        

Part 2: Programming Animated Statistical Graphics with R

Loading the packages

pacman::p_load(readxl, gifski, gapminder,
               plotly, gganimate, tidyverse)

Importing the Data

col <- c("Country", "Continent")
globalPop <- read_xls("data/GlobalPopulation.xls",
                      sheet="Data") %>%
  mutate_each_(funs(factor(.)), col) %>%
  mutate(Year = as.integer(Year))
summary(globalPop)
        Country          Year          Young             Old       
 Afghanistan:  28   Min.   :1996   Min.   : 15.50   Min.   : 1.00  
 Albania    :  28   1st Qu.:2010   1st Qu.: 25.70   1st Qu.: 6.90  
 Algeria    :  28   Median :2024   Median : 34.30   Median :12.80  
 Andorra    :  28   Mean   :2023   Mean   : 41.66   Mean   :17.93  
 Angola     :  28   3rd Qu.:2038   3rd Qu.: 53.60   3rd Qu.:25.90  
 Anguilla   :  28   Max.   :2050   Max.   :109.20   Max.   :77.10  
 (Other)    :6036                                                  
   Population                Continent   
 Min.   :      3.3   Africa       :1568  
 1st Qu.:    605.9   Asia         :1454  
 Median :   5771.6   Europe       :1344  
 Mean   :  34860.9   North America: 976  
 3rd Qu.:  22711.0   Oceania      : 526  
 Max.   :1807878.6   South America: 336  
                                         

Animated Data Visualisation: gganimate methods

Building a static population bubble plot

In the code chunk below, the basic ggplot2 functions are used to create a static bubble plot.

ggplot(globalPop, aes(x = Old, y = Young, 
                      size = Population, 
                      colour = Country)) +
  geom_point(alpha = 0.7, 
             show.legend = FALSE) +
  scale_colour_manual(values = country_colors) +
  scale_size(range = c(2, 12)) +
  labs(title = 'Year: {frame_time}', 
       x = '% Aged', 
       y = '% Young') 

Building the animated bubble plot

  1. transition_time() of gganimate is used to create transition through distinct states in time (i.e. Year).
  2. ease_aes() is used to control easing of aesthetics. The default is linear. Other methods are: quadratic, cubic, quartic, quintic, sine, circular, exponential, elastic, back, and bounce.
ggplot(globalPop, aes(x = Old, y = Young, 
                      size = Population, 
                      colour = Country)) +
  geom_point(alpha = 0.7, 
             show.legend = FALSE) +
  scale_colour_manual(values = country_colors) +
  scale_size(range = c(2, 12)) +
  labs(title = 'Year: {frame_time}', 
       x = '% Aged', 
       y = '% Young') +
  transition_time(Year) +       
  ease_aes('linear')          

Animated Data Visualisation: plotly

Building an animated bubble plot: ggplotly() method

gg <- ggplot(globalPop, 
       aes(x = Old, 
           y = Young, 
           size = Population, 
           colour = Country)) +
  geom_point(aes(size = Population,
                 frame = Year),
             alpha = 0.7, 
             show.legend = FALSE) +
  scale_colour_manual(values = country_colors) +
  scale_size(range = c(2, 12)) +
  labs(x = '% Aged', 
       y = '% Young')

ggplotly(gg)

Building an animated bubble plot: plot_ly() method

bp <- globalPop %>%
  plot_ly(x = ~Old, 
          y = ~Young, 
          size = ~Population, 
          color = ~Continent, 
          frame = ~Year, 
          text = ~Country, 
          hoverinfo = "text",
          type = 'scatter',
          mode = 'markers'
          )
bp